---
title: "descriptive statistics and plots"
author: "Dai Yamao"
date: "`r format(Sys.time(), '%Y-%m-%d')`"
output:
  html_document: default
  pdf_document: default
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, fig.width = 10, fig.height = 5)
rm(list=ls())
library(dplyr)
library(tidyverse)
library(ggplot2)
require(memisc)
library(stargazer)
library(psych)
library(gt)
library(summarytools)
library(fastDummies)
require(stringi)
require(ggpubr)

# Iraq
dat_iraq <- read_csv("data/Iraq.csv")
dat_iraq <- dat_iraq[-1,]

dat_iraq$RecordedDate <- as.Date(stri_match_first_regex(dat_iraq$RecordedDate, "\\d{4}-\\d{2}-\\d{2}"))

dat_iraq$ceasefire <- NA
dat_iraq$ceasefire[dat_iraq$RecordedDate <= as.Date("2025-01-16")] <- "Pre"
dat_iraq$ceasefire[as.Date("2025-01-17") <= dat_iraq$RecordedDate] <- "Post"
dat_iraq$ceasefire <- factor(dat_iraq$ceasefire)

dat_iraq <- dat_iraq |> 
  mutate(Gender = as.numeric(D1),
         Education = as.numeric(D3),
         Income = as.numeric(D5),
         Age_group = as.numeric(D2),
         Sect = case_when(D6 == 1 ~ "Sunni",
                          D6 == 2 ~ "Shia",
                          D6 == 3 ~ "Christian",
                          D6 == 4 ~ "Kurd",
                          D6 == 5 ~ "Others"),
         Iran = case_when(Q2_2 >= 4 ~ "Like",
                          Q2_2 == 3 ~ "Neither",
                          Q2_2 <= 2 ~ "Dislike"))

# Lebanon
dat_lebanon <- read_csv("data/Lebanon.csv")
dat_lebanon <- dat_lebanon[-1,]

dat_lebanon$RecordedDate <- as.Date(stri_match_first_regex(dat_lebanon$RecordedDate, "\\d{4}-\\d{2}-\\d{2}"))

dat_lebanon$ceasefire <- NA
dat_lebanon$ceasefire[dat_lebanon$RecordedDate <= as.Date("2025-01-16")] <- "Pre"
dat_lebanon$ceasefire[as.Date("2025-01-17") <= dat_lebanon$RecordedDate] <- "Post"
dat_lebanon$ceasefire <- factor(dat_lebanon$ceasefire)

dat_lebanon <- dat_lebanon |> 
  mutate(Gender = as.numeric(D1),
         Education = as.numeric(D3),
         Income = as.numeric(D5),
         Age_group = as.numeric(D2),
         Sect = case_when(D6 == 1 ~ "Sunni",
                          D6 == 2 ~ "Shia",
                          D6 == 3 ~ "Maronites",
                          D6 == 4 ~ "Druze",
                          D6 == 5 ~ "Ismail",
                          D6 == 6 ~ "Armenian",
                          D6 == 7 ~ "Greek",
                          D6 == 8 ~ "Protestant",
                          D6 == 9 ~ "Minorities",
                          D6 == 10 ~ "Multi-sectarian",
                          D6 == 13 ~ "Secular"),
         Iran = case_when(Q2_2 >= 4 ~ "Like",
                          Q2_2 == 3 ~ "Neither",
                          Q2_2 <= 2 ~ "Dislike"))

# Palestine
dat_palestine <- read_csv("data/Palestine.csv")
dat_palestine <- dat_palestine[-1,]

dat_palestine$RecordedDate <- as.Date(stri_match_first_regex(dat_palestine$RecordedDate, "\\d{4}-\\d{2}-\\d{2}"))

dat_palestine$ceasefire <- NA
dat_palestine$ceasefire[dat_palestine$RecordedDate <= as.Date("2025-01-16")] <- "Pre"
dat_palestine$ceasefire[as.Date("2025-01-17") <= dat_palestine$RecordedDate] <- "Post"
dat_palestine$ceasefire <- factor(dat_palestine$ceasefire)

dat_palestine <- dat_palestine |> 
  mutate(Gender = as.numeric(D1),
         Education = as.numeric(D3),
         Income = as.numeric(D5),
         Age_group = as.numeric(D2),
         Sect = case_when(D6 == 1 ~ "Muslim",
                          D6 == 2 ~ "Christian",
                          D6 == 3 ~ "Jewish",
                          D6 == 4 ~ "Others"),
         Iran = case_when(Q2_2 >= 4 ~ "Like",
                          Q2_2 == 3 ~ "Neither",
                          Q2_2 <= 2 ~ "Dislike"))

# Yemen
dat_yemen <- read_csv("data/Yemen.csv")
dat_yemen <- dat_yemen[-1,]

dat_yemen$RecordedDate <- as.Date(stri_match_first_regex(dat_yemen$RecordedDate, "\\d{4}-\\d{2}-\\d{2}"))

dat_yemen$ceasefire <- NA
dat_yemen$ceasefire[dat_yemen$RecordedDate <= as.Date("2025-01-16")] <- "Pre"
dat_yemen$ceasefire[as.Date("2025-01-17") <= dat_yemen$RecordedDate] <- "Post"
dat_yemen$ceasefire <- factor(dat_yemen$ceasefire)

dat_yemen <- dat_yemen |> 
  mutate(Gender = as.numeric(D1),
         Education = as.numeric(D3),
         Income = as.numeric(D5),
         Age_group = as.numeric(D2),
         Sect = case_when(D6 == 1 ~ "Sunni",
                          D6 == 2 ~ "Zaydi",
                          D6 == 3 ~ "Others"),
         Iran = case_when(Q2_2 >= 4 ~ "Like",
                          Q2_2 == 3 ~ "Neither",
                          Q2_2 <= 2 ~ "Dislike"))
```

# gender
```{r}
# IQ
table(dat_iraq$Gender)
gender_dat_iq <- data.frame(Gender = c("Female", "Male"),
                         n = c(227, 951))

p1 <- gender_dat_iq |>
  mutate(per = n / sum(n)) %>%
  mutate(label = paste0(Gender, "\n", scales::percent(per, 0.1))) %>%
  ggplot(aes(x = 0, y = n, fill = factor(n))) +
  geom_col() +
  coord_polar("y") +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5))+
  theme_void() +
  theme(legend.position = "none") +
  labs(title = "Iraq")
p1

# LB
table(dat_lebanon$Gender)
gender_dat_lb <- data.frame(Gender = c("Female", "Male"),
                         n = c(439, 754))

p2 <- gender_dat_lb |>
  mutate(per = n / sum(n)) %>%
  mutate(label = paste0(Gender, "\n", scales::percent(per, 0.1))) %>%
  ggplot(aes(x = 0, y = n, fill = factor(n))) +
  geom_col() +
  coord_polar("y") +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5))+
  theme_void() +
  theme(legend.position = "none") +
  labs(title = "Lebanon")
p2 

# PL
table(dat_palestine$Gender)
gender_dat_pl <- data.frame(Gender = c("Female", "Male"),
                         n = c(52, 83))

p3 <- gender_dat_pl |>
  mutate(per = n / sum(n)) %>%
  mutate(label = paste0(Gender, "\n", scales::percent(per, 0.1))) %>%
  ggplot(aes(x = 0, y = n, fill = factor(n))) +
  geom_col() +
  coord_polar("y") +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5))+
  theme_void() +
  theme(legend.position = "none") +
  labs(title = "Palestine")
p3

# YM
table(dat_yemen$Gender)
gender_dat_ym <- data.frame(Gender = c("Female", "Male"),
                         n = c(217, 1304))

p4 <- gender_dat_ym |>
  mutate(per = n / sum(n)) %>%
  mutate(label = paste0(Gender, "\n", scales::percent(per, 0.1))) %>%
  ggplot(aes(x = 0, y = n, fill = factor(n))) +
  geom_col() +
  coord_polar("y") +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5))+
  theme_void() +
  theme(legend.position = "none") +
  labs(title = "Yemen")
p4

# plot
p5 <- ggarrange(p3, p2,
                p1, p4,
                nrow = 2, ncol = 2, align = "hv")
p5

ggsave(filename = "result/gender_proportion.png",
       plot = p5,
       width = 7,
       height = 4,
       units = "in", 
       dpi = 600,
       bg = "white")
```

# age
```{r}
# IQ
table(dat_iraq$Age_group)
age_dat_iq <- data.frame(Age_group = c("18-20", "21-30", "31-40", "41-50", "51-60", "60-"),
                       n = c(239, 615, 203, 76, 27, 6))
p10 <- age_dat_iq |>
  mutate(per = n / sum(n)) %>%
  mutate(label = paste0(Age_group, "\n", scales::percent(per, 0.1))) %>%
  ggplot(aes(x = 0, y = n, fill = factor(n))) +
  geom_col() +
  coord_polar("y") +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5))+
  theme_void() +
  theme(legend.position = "none") +
  labs(title = "Iraq")
p10

ggplot(data = dat_iraq, aes(x = Age_group, fill = Age_group)) +
  geom_bar() 

# LB
table(dat_lebanon$Age_group)
age_dat_lb <- data.frame(Age_group = c("18-20", "21-30", "31-40", "41-50", "51-60", "60-"),
                       n = c(134, 447, 349, 171,74, 10))
p11 <- age_dat_lb |>
  mutate(per = n / sum(n)) %>%
  mutate(label = paste0(Age_group, "\n", scales::percent(per, 0.1))) %>%
  ggplot(aes(x = 0, y = n, fill = factor(n))) +
  geom_col() +
  coord_polar("y") +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5))+
  theme_void() +
  theme(legend.position = "none") +
  labs(title = "Lebanon")
p11

ggplot(data = dat_lebanon, aes(x = Age_group, fill = Age_group)) +
  geom_bar() 

# PL
table(dat_palestine$Age_group)
age_dat_pl <- data.frame(Age_group = c("18-20", "21-30", "31-40", "41-50", "51-60", "60-"),
                       n = c(15, 51, 41, 24, 4, 0))
p12 <- age_dat_pl |>
  mutate(per = n / sum(n)) %>%
  mutate(label = paste0(Age_group, "\n", scales::percent(per, 0.1))) %>%
  ggplot(aes(x = 0, y = n, fill = factor(n))) +
  geom_col() +
  coord_polar("y") +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5))+
  theme_void() +
  theme(legend.position = "none") +
  labs(title = "Palestine")
p12

ggplot(data = dat_palestine, aes(x = Age_group, fill = Age_group)) +
  geom_bar() 

# YM
table(dat_yemen$Age_group)
age_dat_ym <- data.frame(Age_group = c("18-20", "21-30", "31-40", "41-50", "51-60", "60-"),
                       n = c(168, 661, 457, 190, 31, 1))
p13 <- age_dat_ym |>
  mutate(per = n / sum(n)) %>%
  mutate(label = paste0(Age_group, "\n", scales::percent(per, 0.1))) %>%
  ggplot(aes(x = 0, y = n, fill = factor(n))) +
  geom_col() +
  coord_polar("y") +
  geom_text(aes(label = label), position = position_stack(vjust = 0.5))+
  theme_void() +
  theme(legend.position = "none") +
  labs(title = "Yemen")

ggplot(data = dat_yemen, aes(x = Age_group, fill = Age_group)) +
  geom_bar() 

# plot
p14 <- ggarrange(p12, p11,
                 p10, p13,
                 nrow = 2, ncol = 2, align = "hv")
p14

ggsave(filename = "result/age_proportion.png",
       plot = p14,
       width = 15,
       height = 7,
       units = "in", 
       dpi = 600,
       bg = "white")
```

# sect
```{r}
# IQ
barplot(table(dat_iraq$Sect))

p20 <- ggplot(data = dat_iraq, aes(x = Sect, fill = Sect)) +
  geom_bar() +
  labs(title = "Iraq")
p20

# LB
barplot(table(dat_lebanon$Sect))

p21 <- ggplot(data = dat_lebanon, aes(x = Sect, fill = Sect)) +
  geom_bar() +
  labs(title = "Lebanon")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
p21

# PL
barplot(table(dat_palestine$Sect))

p22 <- ggplot(data = dat_palestine, aes(x = Sect, fill = Sect)) +
  geom_bar() +
  labs(title = "Palestine")
p22

# YM
barplot(table(dat_yemen$Sect))

p23 <- ggplot(data = dat_yemen, aes(x = Sect, fill = Sect)) +
  geom_bar() +
  labs(title = "Yemen")
p23

# plot
p24 <- ggarrange(p22, p21,
                 p20, p23,
                 nrow = 2, ncol = 2, align = "hv")
p24

ggsave(filename = "result/sectarian_proportion.png",
       plot = p24,
       width = 15,
       height = 7,
       units = "in", 
       dpi = 600,
       bg = "white")
```

# descriptive statistics
```{r}
# IQ
dat_iraq <- dat_iraq |>
  mutate(Male = if_else(Gender == 1, 1, 0),
         Female = if_else(Gender == 2, 1, 0),
         Sunni = if_else(Sect == "Sunni", 1, 0),
         Shia = if_else(Sect == "Shia", 1, 0),
         Christian = if_else(Sect == "Christian", 1, 0),
         Kurd = if_else(Sect == "Kurd", 1, 0),
         Others = if_else(Sect == "Others", 1, 0),
         ceasefire_pre = if_else(ceasefire == "Pre", 1, 0),
         ceasefire_post = if_else(ceasefire == "Post", 1, 0),
         Iran_dislike = if_else(Iran == "Dislike", 1, 0),
         Iran_nither = if_else(Iran == "Neither", 1, 0),
         Iran_like = if_else(Iran == "Like", 1, 0))

desc_stat_iq <- dat_iraq |>
  dplyr::select(ceasefire_pre, ceasefire_post, Iran_dislike, Iran_nither, Iran_like,
                Sunni, Shia, Christian, Kurd, Others,
                Gender, Male, Female, Age, Education, Income)
str(desc_stat_iq)
summary(desc_stat_iq)
stargazer(as.data.frame(desc_stat_iq), type = "text", summary = TRUE)

summarytools::descr(desc_stat_iq, stats = c("n.valid", "mean", "sd", "min", "max"), 
                    transpose = TRUE, order = "p") |>
  memisc::write_html(file = "result/descriptive_statistics_iq.html")

df_desc_stat_iq <- summarytools::descr(desc_stat_iq, stats = c("n.valid", "mean", "sd", "min", "max"), 
                    transpose = TRUE, order = "p")

gt(df_desc_stat_iq,
   rownames_to_stub = TRUE) |>
  fmt_number(columns = 3:4, decimals = 3)

# LB
dat_lebanon <- dat_lebanon |>
  mutate(Male = if_else(Gender == 1, 1, 0),
         Female = if_else(Gender == 2, 1, 0),
         Sunni = if_else(Sect == "Sunni", 1, 0),
         Shia = if_else(Sect == "Shia", 1, 0),
         Maronites = if_else(Sect == "Maronites", 1, 0),
         Druze = if_else(Sect == "Druze", 1, 0),
         Ismail = if_else(Sect == "Ismail", 1, 0),
         Armenian = if_else(Sect == "Armenian", 1, 0),
         Greek = if_else(Sect == "Greek", 1, 0),
         Protestant = if_else(Sect == "Protestant", 1, 0),
         Minorities = if_else(Sect == "Minorities", 1, 0),
         Multi_sectarian = if_else(Sect == "Multi-sectarian", 1, 0),
         Secular = if_else(Sect == "Secular", 1, 0),
         ceasefire_pre = if_else(ceasefire == "Pre", 1, 0),
         ceasefire_post = if_else(ceasefire == "Post", 1, 0),
         Iran_dislike = if_else(Iran == "Dislike", 1, 0),
         Iran_nither = if_else(Iran == "Neither", 1, 0),
         Iran_like = if_else(Iran == "Like", 1, 0))

desc_stat_lb <- dat_lebanon |>
  dplyr::select(ceasefire_pre, ceasefire_post, Iran_dislike, Iran_nither, Iran_like,
                Sunni, Shia, Maronites, Druze, Ismail, Armenian,
                Greek, Protestant, Minorities, Multi_sectarian, Secular,
                Gender, Male, Female, Age, Education, Income)
str(desc_stat_lb)
summary(desc_stat_lb)
stargazer(as.data.frame(desc_stat_lb), type = "text", summary = TRUE)

summarytools::descr(desc_stat_lb, stats = c("n.valid", "mean", "sd", "min", "max"), 
                    transpose = TRUE, order = "p") |>
  memisc::write_html(file = "result/descriptive_statistics_lb.html")

df_desc_stat_lb <- summarytools::descr(desc_stat_lb, stats = c("n.valid", "mean", "sd", "min", "max"), 
                    transpose = TRUE, order = "p")

gt(df_desc_stat_lb,
   rownames_to_stub = TRUE) |>
  fmt_number(columns = 3:4, decimals = 3)

# PL 
dat_palestine <- dat_palestine |>
  mutate(Male = if_else(Gender == 1, 1, 0),
         Female = if_else(Gender == 2, 1, 0),
         Christian = if_else(Sect == "Christian", 1, 0),
         Jewish = if_else(Sect == "Jewish", 1, 0),
         Muslim = if_else(Sect == "Muslim", 1, 0),
         ceasefire_pre = if_else(ceasefire == "Pre", 1, 0),
         ceasefire_post = if_else(ceasefire == "Post", 1, 0),
         Iran_dislike = if_else(Iran == "Dislike", 1, 0),
         Iran_nither = if_else(Iran == "Neither", 1, 0),
         Iran_like = if_else(Iran == "Like", 1, 0))

desc_stat_pl <- dat_palestine |>
  dplyr::select(ceasefire_pre, ceasefire_post, Iran_dislike, Iran_nither, Iran_like,
                Christian, Muslim, Jewish,
                Gender, Male, Female, Age, Education, Income)
str(desc_stat_pl)
summary(desc_stat_pl)
stargazer(as.data.frame(desc_stat_pl), type = "text", summary = TRUE)

summarytools::descr(desc_stat_pl, stats = c("n.valid", "mean", "sd", "min", "max"), 
                    transpose = TRUE, order = "p") |>
  memisc::write_html(file = "result/descriptive_statistics_pl.html")

df_desc_stat_pl <- summarytools::descr(desc_stat_pl, stats = c("n.valid", "mean", "sd", "min", "max"), 
                    transpose = TRUE, order = "p")

gt(df_desc_stat_pl,
   rownames_to_stub = TRUE) |>
  fmt_number(columns = 3:4, decimals = 3)

# YM
dat_yemen <- dat_yemen |>
  mutate(Male = if_else(Gender == 1, 1, 0),
         Female = if_else(Gender == 2, 1, 0),
         Sunni = if_else(Sect == "Sunni", 1, 0),
         Zaydi = if_else(Sect == "Zaydi", 1, 0),
         Others = if_else(Sect == "Others", 1, 0),
         ceasefire_pre = if_else(ceasefire == "Pre", 1, 0),
         ceasefire_post = if_else(ceasefire == "Post", 1, 0),
         Iran_dislike = if_else(Iran == "Dislike", 1, 0),
         Iran_nither = if_else(Iran == "Neither", 1, 0),
         Iran_like = if_else(Iran == "Like", 1, 0))

desc_stat_ym <- dat_yemen |>
  dplyr::select(ceasefire_pre, ceasefire_post, Iran_dislike, Iran_nither, Iran_like,
                Sunni, Zaydi, Others,
                Gender, Male, Female, Age, Education, Income)
str(desc_stat_ym)
summary(desc_stat_ym)
stargazer(as.data.frame(desc_stat_ym), type = "text", summary = TRUE)

summarytools::descr(desc_stat_ym, stats = c("n.valid", "mean", "sd", "min", "max"), 
                    transpose = TRUE, order = "p") |>
  memisc::write_html(file = "result/descriptive_statistics_ym.html")

df_desc_stat_ym <- summarytools::descr(desc_stat_ym, stats = c("n.valid", "mean", "sd", "min", "max"), 
                    transpose = TRUE, order = "p")

gt(df_desc_stat_ym,
   rownames_to_stub = TRUE) |>
  fmt_number(columns = 3:4, decimals = 3)

```

